################################################################################
## Paper:     Working the Crowd
## Authors:   P. Mongrain, N. Fréchet, B. Thompson Collart, and Y. Dufresne
## Date:      February 2025
################################################################################

################################################################################
## IMPORTANT NOTE
################################################################################

# Please run the "merge.do" *before* running the code

################################################################################
## SET WORKING DIRECTORY
################################################################################

getwd()

setwd("C:/Users/pmongrain/Desktop/main/article_5/submission_cjps/") #add appropriate path here

################################################################################
## LOAD PACKAGES 
################################################################################

#remotes::install_github("NightingaleHealth/ggforestplot")

library(cowplot)
library(devtools)
library(dfoptim)
library(dotwhisker)
library(dplyr) 
library(effects)
library(emmeans)
library(ggalt)
library(ggeasy)
library(ggeffects)
library(ggforestplot)
library(ggplot2)
library(ggpubr)
library(ggridges)
library(grid)
library(gridExtra)
library(haven)
library(infer)
library(interplot)
library(kableExtra)
library(lattice)
library(lme4)
library(lubridate)
library(magrittr)
library(marginaleffects)
library(margins)
library(prettyR)
library(psych)
library(readxl)
library(rms)
library(scales)
library(sjlabelled)
library(sjmisc)
library(sjPlot)
library(srvyr)
library(tibble)
library(tidyverse)
library(visreg)
library(writexl)

################################################################################
## LOAD DATA
################################################################################

## IMPORT DATA FROM STATA

merge <- read_dta("merge2.dta") #this file will be created by running the "merge.do" file

## DEFINE VARIABLES AS FACTOR OR NUMERIC

merge$correct_district <- as.factor(merge$correct_district)
merge$vote_district <- as.factor(merge$vote_district)
merge$postsec <- as.factor(merge$postsec)
merge$male <- as.factor(merge$male)
merge$age55 <- as.factor(merge$age55)
merge$highinc <- as.factor(merge$highinc)
merge$reelected <- as.factor(merge$reelected)
merge$boundary <- as.factor(merge$boundary)
merge$margin <- as.numeric(merge$margin)
merge$interest <- as.numeric(merge$interest)
merge$interest_n <- as.numeric(merge$interest_n)
merge$pidscale_district <- as.numeric(merge$pidscale_district)
merge$pidstatus_district <- as.factor(merge$pidstatus_district)
merge$time <- as.numeric(merge$time)
merge$diversity_educ <- as.numeric(merge$diversity_educ)
merge$diversity_age <- as.numeric(merge$diversity_age)
merge$diversity_male <- as.numeric(merge$diversity_male)
merge$diversity_income <- as.numeric(merge$diversity_income)
merge$diversity_choice <- as.numeric(merge$diversity_choice)
merge$diversity_pid <- as.numeric(merge$diversity_pid)
merge$diversity_all <- as.numeric(merge$diversity_all)
merge$diversity_time <- as.numeric(merge$diversity_time)
merge$diversity_interest <- as.numeric(merge$diversity_interest)
merge$vote_district <- factor(merge$vote_district, levels = c(0,1),
                              labels = c("No", "Yes"))

################################################################################
## FIGURE 1: SAMPLE SIZES (BY ELECTION)
################################################################################

## PREPARE DATA

# Load data

merge.all <- merge

# Define forecast variable as numeric

merge.all$correct_district <- as.numeric(merge.all$correct_district)

# Add column with number of observations by district

merge.all <- merge.all %>% dplyr::group_by(election, district_code) %>% add_tally(name = "obs")

# Keep district with 60 observations or more only

merge.all <- merge.all %>% drop_na(district_code, correct_district) %>% filter(obs >= 60)

# Keep election, district code, district outcome, district forecast, and correct forecast variables only

merge.all <- subset(merge.all, select = c(election, vote_district, district_code, district_outcome, forecast_district, correct_district))

merge.all$district_code <- paste(merge.all$district_code,merge.all$election)

# Keep data for the 2011 Canadian federal election only

merge.all.ca2011 <- merge.all %>% filter(election == "ca2011")

# Keep data for the 2015 Canadian federal election only

merge.all.ca2015 <- merge.all %>% filter(election == "ca2015")

# Keep data for the 2019 Canadian federal election only

merge.all.ca2019 <- merge.all %>% filter(election == "ca2019")

# Keep data for the 2011 Ontario general election only

merge.all.on2011 <- merge.all %>% filter(election == "on2011")

# Keep data for the 2014 Ontario general election only

merge.all.on2014 <- merge.all %>% filter(election == "on2014")

# Keep data for the 2022 Quebec general election only

merge.all.qc2022 <- merge.all %>% filter(election == "qc2022")

# Unique values

length(unique(merge.all.ca2011$district_code))
length(unique(merge.all.ca2015$district_code))
length(unique(merge.all.ca2019$district_code))
length(unique(merge.all.on2011$district_code))
length(unique(merge.all.on2014$district_code))
length(unique(merge.all.qc2022$district_code))
length(unique(merge.all$district_code))

# Set seed for replication

set.seed(1234)

## DRAW RANDOM SAMPLES: 2011 CANADIAN FEDERAL ELECTION

# Sample size: 1

random.all.ca2011.1 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=1,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 2

random.all.ca2011.2 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=2,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 3

random.all.ca2011.3 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=3,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 4

random.all.ca2011.4 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=4,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 5

random.all.ca2011.5 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=5,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 6

random.all.ca2011.6 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=6,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 7

random.all.ca2011.7 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=7,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 8

random.all.ca2011.8 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=8,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 9

random.all.ca2011.9 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=9,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 10

random.all.ca2011.10 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=10,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 11

random.all.ca2011.11 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=11,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 12

random.all.ca2011.12 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=12,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 13

random.all.ca2011.13 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=13,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 14

random.all.ca2011.14 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=14,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 15

random.all.ca2011.15 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=15,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 16

random.all.ca2011.16 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=16,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 17

random.all.ca2011.17 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=17,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 18

random.all.ca2011.18 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=18,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 19

random.all.ca2011.19 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=19,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 20

random.all.ca2011.20 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=20,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 21

random.all.ca2011.21 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=21,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 22

random.all.ca2011.22 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=22,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 23

random.all.ca2011.23 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=23,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 24

random.all.ca2011.24 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=24,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 25

random.all.ca2011.25 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=25,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 26

random.all.ca2011.26 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=26,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 27

random.all.ca2011.27 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=27,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 28

random.all.ca2011.28 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=28,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 29

random.all.ca2011.29 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=29,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 30

random.all.ca2011.30 <- merge.all.ca2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=30,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Create variable for sample size

random.all.ca2011.1 <- random.all.ca2011.1 %>% mutate(size = 1)
random.all.ca2011.2 <- random.all.ca2011.2 %>% mutate(size = 2)
random.all.ca2011.3 <- random.all.ca2011.3 %>% mutate(size = 3)
random.all.ca2011.4 <- random.all.ca2011.4 %>% mutate(size = 4)
random.all.ca2011.5 <- random.all.ca2011.5 %>% mutate(size = 5)
random.all.ca2011.6 <- random.all.ca2011.6 %>% mutate(size = 6)
random.all.ca2011.7 <- random.all.ca2011.7 %>% mutate(size = 7)
random.all.ca2011.8 <- random.all.ca2011.8 %>% mutate(size = 8)
random.all.ca2011.9 <- random.all.ca2011.9 %>% mutate(size = 9)
random.all.ca2011.10 <- random.all.ca2011.10 %>% mutate(size = 10)
random.all.ca2011.11 <- random.all.ca2011.11 %>% mutate(size = 11)
random.all.ca2011.12 <- random.all.ca2011.12 %>% mutate(size = 12)
random.all.ca2011.13 <- random.all.ca2011.13 %>% mutate(size = 13)
random.all.ca2011.14 <- random.all.ca2011.14 %>% mutate(size = 14)
random.all.ca2011.15 <- random.all.ca2011.15 %>% mutate(size = 15)
random.all.ca2011.16 <- random.all.ca2011.16 %>% mutate(size = 16)
random.all.ca2011.17 <- random.all.ca2011.17 %>% mutate(size = 17)
random.all.ca2011.18 <- random.all.ca2011.18 %>% mutate(size = 18)
random.all.ca2011.19 <- random.all.ca2011.19 %>% mutate(size = 19)
random.all.ca2011.20 <- random.all.ca2011.20 %>% mutate(size = 20)
random.all.ca2011.21 <- random.all.ca2011.21 %>% mutate(size = 21)
random.all.ca2011.22 <- random.all.ca2011.22 %>% mutate(size = 22)
random.all.ca2011.23 <- random.all.ca2011.23 %>% mutate(size = 23)
random.all.ca2011.24 <- random.all.ca2011.24 %>% mutate(size = 24)
random.all.ca2011.25 <- random.all.ca2011.25 %>% mutate(size = 25)
random.all.ca2011.26 <- random.all.ca2011.26 %>% mutate(size = 26)
random.all.ca2011.27 <- random.all.ca2011.27 %>% mutate(size = 27)
random.all.ca2011.28 <- random.all.ca2011.28 %>% mutate(size = 28)
random.all.ca2011.29 <- random.all.ca2011.29 %>% mutate(size = 29)
random.all.ca2011.30 <- random.all.ca2011.30 %>% mutate(size = 30)

# Merge dataframes for varying sample sizes

random.all.ca2011 <- rbind(random.all.ca2011.1,random.all.ca2011.2,random.all.ca2011.3,
                           random.all.ca2011.4,random.all.ca2011.5,random.all.ca2011.6,
                           random.all.ca2011.7,random.all.ca2011.8,random.all.ca2011.9,
                           random.all.ca2011.10,random.all.ca2011.11,random.all.ca2011.12,
                           random.all.ca2011.13,random.all.ca2011.14,random.all.ca2011.15,
                           random.all.ca2011.16,random.all.ca2011.17,random.all.ca2011.18,
                           random.all.ca2011.19,random.all.ca2011.20,random.all.ca2011.21,
                           random.all.ca2011.22,random.all.ca2011.23,random.all.ca2011.24,
                           random.all.ca2011.25,random.all.ca2011.26,random.all.ca2011.27,
                           random.all.ca2011.28,random.all.ca2011.29,random.all.ca2011.30)

random.all.ca2011.max <- random.all.ca2011 %>% dplyr::group_by(district_code, 
                                                        district_outcome, replicate, size) %>% summarize(most_frequent = names(which.max(table(forecast_district))))

random.all.ca2011.max$most_frequent <- as.numeric(random.all.ca2011.max$most_frequent)

random.all.ca2011.max$correct <- with(random.all.ca2011.max, 
                                      ifelse(most_frequent == district_outcome, 
                                             '1', '0'))

random.all.ca2011.max$correct <- as.numeric(random.all.ca2011.max$correct)

random.all.ca2011.wisdom <- random.all.ca2011.max %>% dplyr::group_by(size) %>% 
  summarise(pct_correct = 100*mean(correct))

write_xlsx(random.all.ca2011.wisdom,
           "C:/Users/pmongrain/Desktop/main/article_5/weighting/random_all_ca2011_wisdom.xlsx")

## DRAW RANDOM SAMPLES: 2015 CANADIAN FEDERAL ELECTION

# Sample size: 1

random.all.ca2015.1 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=1,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 2

random.all.ca2015.2 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=2,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 3

random.all.ca2015.3 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=3,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 4

random.all.ca2015.4 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=4,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 5

random.all.ca2015.5 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=5,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 6

random.all.ca2015.6 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=6,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 7

random.all.ca2015.7 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=7,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 8

random.all.ca2015.8 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=8,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 9

random.all.ca2015.9 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=9,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 10

random.all.ca2015.10 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=10,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 11

random.all.ca2015.11 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=11,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 12

random.all.ca2015.12 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=12,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 13

random.all.ca2015.13 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=13,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 14

random.all.ca2015.14 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=14,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 15

random.all.ca2015.15 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=15,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 16

random.all.ca2015.16 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=16,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 17

random.all.ca2015.17 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=17,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 18

random.all.ca2015.18 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=18,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 19

random.all.ca2015.19 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=19,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 20

random.all.ca2015.20 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=20,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 21

random.all.ca2015.21 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=21,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 22

random.all.ca2015.22 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=22,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 23

random.all.ca2015.23 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=23,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 24

random.all.ca2015.24 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=24,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 25

random.all.ca2015.25 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=25,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 26

random.all.ca2015.26 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=26,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 27

random.all.ca2015.27 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=27,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 28

random.all.ca2015.28 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=28,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 29

random.all.ca2015.29 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=29,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 30

random.all.ca2015.30 <- merge.all.ca2015 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=30,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Create variable for sample size

random.all.ca2015.1 <- random.all.ca2015.1 %>% mutate(size = 1)
random.all.ca2015.2 <- random.all.ca2015.2 %>% mutate(size = 2)
random.all.ca2015.3 <- random.all.ca2015.3 %>% mutate(size = 3)
random.all.ca2015.4 <- random.all.ca2015.4 %>% mutate(size = 4)
random.all.ca2015.5 <- random.all.ca2015.5 %>% mutate(size = 5)
random.all.ca2015.6 <- random.all.ca2015.6 %>% mutate(size = 6)
random.all.ca2015.7 <- random.all.ca2015.7 %>% mutate(size = 7)
random.all.ca2015.8 <- random.all.ca2015.8 %>% mutate(size = 8)
random.all.ca2015.9 <- random.all.ca2015.9 %>% mutate(size = 9)
random.all.ca2015.10 <- random.all.ca2015.10 %>% mutate(size = 10)
random.all.ca2015.11 <- random.all.ca2015.11 %>% mutate(size = 11)
random.all.ca2015.12 <- random.all.ca2015.12 %>% mutate(size = 12)
random.all.ca2015.13 <- random.all.ca2015.13 %>% mutate(size = 13)
random.all.ca2015.14 <- random.all.ca2015.14 %>% mutate(size = 14)
random.all.ca2015.15 <- random.all.ca2015.15 %>% mutate(size = 15)
random.all.ca2015.16 <- random.all.ca2015.16 %>% mutate(size = 16)
random.all.ca2015.17 <- random.all.ca2015.17 %>% mutate(size = 17)
random.all.ca2015.18 <- random.all.ca2015.18 %>% mutate(size = 18)
random.all.ca2015.19 <- random.all.ca2015.19 %>% mutate(size = 19)
random.all.ca2015.20 <- random.all.ca2015.20 %>% mutate(size = 20)
random.all.ca2015.21 <- random.all.ca2015.21 %>% mutate(size = 21)
random.all.ca2015.22 <- random.all.ca2015.22 %>% mutate(size = 22)
random.all.ca2015.23 <- random.all.ca2015.23 %>% mutate(size = 23)
random.all.ca2015.24 <- random.all.ca2015.24 %>% mutate(size = 24)
random.all.ca2015.25 <- random.all.ca2015.25 %>% mutate(size = 25)
random.all.ca2015.26 <- random.all.ca2015.26 %>% mutate(size = 26)
random.all.ca2015.27 <- random.all.ca2015.27 %>% mutate(size = 27)
random.all.ca2015.28 <- random.all.ca2015.28 %>% mutate(size = 28)
random.all.ca2015.29 <- random.all.ca2015.29 %>% mutate(size = 29)
random.all.ca2015.30 <- random.all.ca2015.30 %>% mutate(size = 30)

# Merge dataframes for varying sample sizes

random.all.ca2015 <- rbind(random.all.ca2015.1,random.all.ca2015.2,random.all.ca2015.3,
                           random.all.ca2015.4,random.all.ca2015.5,random.all.ca2015.6,
                           random.all.ca2015.7,random.all.ca2015.8,random.all.ca2015.9,
                           random.all.ca2015.10,random.all.ca2015.11,random.all.ca2015.12,
                           random.all.ca2015.13,random.all.ca2015.14,random.all.ca2015.15,
                           random.all.ca2015.16,random.all.ca2015.17,random.all.ca2015.18,
                           random.all.ca2015.19,random.all.ca2015.20,random.all.ca2015.21,
                           random.all.ca2015.22,random.all.ca2015.23,random.all.ca2015.24,
                           random.all.ca2015.25,random.all.ca2015.26,random.all.ca2015.27,
                           random.all.ca2015.28,random.all.ca2015.29,random.all.ca2015.30)

random.all.ca2015.max <- random.all.ca2015 %>% dplyr::group_by(district_code, 
                                                        district_outcome, replicate, size) %>% summarize(most_frequent = names(which.max(table(forecast_district))))

random.all.ca2015.max$most_frequent <- as.numeric(random.all.ca2015.max$most_frequent)

random.all.ca2015.max$correct <- with(random.all.ca2015.max, 
                                      ifelse(most_frequent == district_outcome, 
                                             '1', '0'))

random.all.ca2015.max$correct <- as.numeric(random.all.ca2015.max$correct)

random.all.ca2015.wisdom <- random.all.ca2015.max %>% dplyr::group_by(size) %>% 
  summarise(pct_correct = 100*mean(correct))

write_xlsx(random.all.ca2015.wisdom,
           "C:/Users/pmongrain/Desktop/main/article_5/weighting/random_all_ca2015_wisdom.xlsx")

## DRAW RANDOM SAMPLES: 2019 CANADIAN FEDERAL ELECTION

# Sample size: 1

random.all.ca2019.1 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=1,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 2

random.all.ca2019.2 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=2,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 3

random.all.ca2019.3 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=3,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 4

random.all.ca2019.4 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=4,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 5

random.all.ca2019.5 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=5,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 6

random.all.ca2019.6 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=6,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 7

random.all.ca2019.7 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=7,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 8

random.all.ca2019.8 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=8,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 9

random.all.ca2019.9 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=9,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 10

random.all.ca2019.10 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=10,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 11

random.all.ca2019.11 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=11,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 12

random.all.ca2019.12 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=12,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 13

random.all.ca2019.13 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=13,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 14

random.all.ca2019.14 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=14,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 15

random.all.ca2019.15 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=15,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 16

random.all.ca2019.16 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=16,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 17

random.all.ca2019.17 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=17,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 18

random.all.ca2019.18 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=18,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 19

random.all.ca2019.19 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=19,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 20

random.all.ca2019.20 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=20,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 21

random.all.ca2019.21 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=21,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 22

random.all.ca2019.22 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=22,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 23

random.all.ca2019.23 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=23,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 24

random.all.ca2019.24 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=24,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 25

random.all.ca2019.25 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=25,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 26

random.all.ca2019.26 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=26,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 27

random.all.ca2019.27 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=27,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 28

random.all.ca2019.28 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=28,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 29

random.all.ca2019.29 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=29,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 30

random.all.ca2019.30 <- merge.all.ca2019 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=30,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Create variable for sample size

random.all.ca2019.1 <- random.all.ca2019.1 %>% mutate(size = 1)
random.all.ca2019.2 <- random.all.ca2019.2 %>% mutate(size = 2)
random.all.ca2019.3 <- random.all.ca2019.3 %>% mutate(size = 3)
random.all.ca2019.4 <- random.all.ca2019.4 %>% mutate(size = 4)
random.all.ca2019.5 <- random.all.ca2019.5 %>% mutate(size = 5)
random.all.ca2019.6 <- random.all.ca2019.6 %>% mutate(size = 6)
random.all.ca2019.7 <- random.all.ca2019.7 %>% mutate(size = 7)
random.all.ca2019.8 <- random.all.ca2019.8 %>% mutate(size = 8)
random.all.ca2019.9 <- random.all.ca2019.9 %>% mutate(size = 9)
random.all.ca2019.10 <- random.all.ca2019.10 %>% mutate(size = 10)
random.all.ca2019.11 <- random.all.ca2019.11 %>% mutate(size = 11)
random.all.ca2019.12 <- random.all.ca2019.12 %>% mutate(size = 12)
random.all.ca2019.13 <- random.all.ca2019.13 %>% mutate(size = 13)
random.all.ca2019.14 <- random.all.ca2019.14 %>% mutate(size = 14)
random.all.ca2019.15 <- random.all.ca2019.15 %>% mutate(size = 15)
random.all.ca2019.16 <- random.all.ca2019.16 %>% mutate(size = 16)
random.all.ca2019.17 <- random.all.ca2019.17 %>% mutate(size = 17)
random.all.ca2019.18 <- random.all.ca2019.18 %>% mutate(size = 18)
random.all.ca2019.19 <- random.all.ca2019.19 %>% mutate(size = 19)
random.all.ca2019.20 <- random.all.ca2019.20 %>% mutate(size = 20)
random.all.ca2019.21 <- random.all.ca2019.21 %>% mutate(size = 21)
random.all.ca2019.22 <- random.all.ca2019.22 %>% mutate(size = 22)
random.all.ca2019.23 <- random.all.ca2019.23 %>% mutate(size = 23)
random.all.ca2019.24 <- random.all.ca2019.24 %>% mutate(size = 24)
random.all.ca2019.25 <- random.all.ca2019.25 %>% mutate(size = 25)
random.all.ca2019.26 <- random.all.ca2019.26 %>% mutate(size = 26)
random.all.ca2019.27 <- random.all.ca2019.27 %>% mutate(size = 27)
random.all.ca2019.28 <- random.all.ca2019.28 %>% mutate(size = 28)
random.all.ca2019.29 <- random.all.ca2019.29 %>% mutate(size = 29)
random.all.ca2019.30 <- random.all.ca2019.30 %>% mutate(size = 30)

# Merge dataframes for varying sample sizes

random.all.ca2019 <- rbind(random.all.ca2019.1,random.all.ca2019.2,random.all.ca2019.3,
                           random.all.ca2019.4,random.all.ca2019.5,random.all.ca2019.6,
                           random.all.ca2019.7,random.all.ca2019.8,random.all.ca2019.9,
                           random.all.ca2019.10,random.all.ca2019.11,random.all.ca2019.12,
                           random.all.ca2019.13,random.all.ca2019.14,random.all.ca2019.15,
                           random.all.ca2019.16,random.all.ca2019.17,random.all.ca2019.18,
                           random.all.ca2019.19,random.all.ca2019.20,random.all.ca2019.21,
                           random.all.ca2019.22,random.all.ca2019.23,random.all.ca2019.24,
                           random.all.ca2019.25,random.all.ca2019.26,random.all.ca2019.27,
                           random.all.ca2019.28,random.all.ca2019.29,random.all.ca2019.30)

random.all.ca2019.max <- random.all.ca2019 %>% dplyr::group_by(district_code, 
                                                        district_outcome, replicate, size) %>% summarize(most_frequent = names(which.max(table(forecast_district))))

random.all.ca2019.max$most_frequent <- as.numeric(random.all.ca2019.max$most_frequent)

random.all.ca2019.max$correct <- with(random.all.ca2019.max, 
                                      ifelse(most_frequent == district_outcome, 
                                             '1', '0'))

random.all.ca2019.max$correct <- as.numeric(random.all.ca2019.max$correct)

random.all.ca2019.wisdom <- random.all.ca2019.max %>% dplyr::group_by(size) %>% 
  summarise(pct_correct = 100*mean(correct))

write_xlsx(random.all.ca2019.wisdom,
           "C:/Users/pmongrain/Desktop/main/article_5/weighting/random_all_ca2019_wisdom.xlsx")

## DRAW RANDOM SAMPLES: 2022 QUEBEC GENERAL ELECTION

# Sample size: 1

random.all.qc2022.1 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=1,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 2

random.all.qc2022.2 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=2,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 3

random.all.qc2022.3 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=3,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 4

random.all.qc2022.4 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=4,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 5

random.all.qc2022.5 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=5,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 6

random.all.qc2022.6 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=6,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 7

random.all.qc2022.7 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=7,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 8

random.all.qc2022.8 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=8,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 9

random.all.qc2022.9 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=9,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 10

random.all.qc2022.10 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=10,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 11

random.all.qc2022.11 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=11,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 12

random.all.qc2022.12 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=12,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 13

random.all.qc2022.13 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=13,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 14

random.all.qc2022.14 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=14,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 15

random.all.qc2022.15 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=15,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 16

random.all.qc2022.16 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=16,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 17

random.all.qc2022.17 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=17,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 18

random.all.qc2022.18 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=18,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 19

random.all.qc2022.19 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=19,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 20

random.all.qc2022.20 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=20,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 21

random.all.qc2022.21 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=21,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 22

random.all.qc2022.22 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=22,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 23

random.all.qc2022.23 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=23,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 24

random.all.qc2022.24 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=24,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 25

random.all.qc2022.25 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=25,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 26

random.all.qc2022.26 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=26,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 27

random.all.qc2022.27 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=27,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 28

random.all.qc2022.28 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=28,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 29

random.all.qc2022.29 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=29,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 30

random.all.qc2022.30 <- merge.all.qc2022 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=30,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Create variable for sample size

random.all.qc2022.1 <- random.all.qc2022.1 %>% mutate(size = 1)
random.all.qc2022.2 <- random.all.qc2022.2 %>% mutate(size = 2)
random.all.qc2022.3 <- random.all.qc2022.3 %>% mutate(size = 3)
random.all.qc2022.4 <- random.all.qc2022.4 %>% mutate(size = 4)
random.all.qc2022.5 <- random.all.qc2022.5 %>% mutate(size = 5)
random.all.qc2022.6 <- random.all.qc2022.6 %>% mutate(size = 6)
random.all.qc2022.7 <- random.all.qc2022.7 %>% mutate(size = 7)
random.all.qc2022.8 <- random.all.qc2022.8 %>% mutate(size = 8)
random.all.qc2022.9 <- random.all.qc2022.9 %>% mutate(size = 9)
random.all.qc2022.10 <- random.all.qc2022.10 %>% mutate(size = 10)
random.all.qc2022.11 <- random.all.qc2022.11 %>% mutate(size = 11)
random.all.qc2022.12 <- random.all.qc2022.12 %>% mutate(size = 12)
random.all.qc2022.13 <- random.all.qc2022.13 %>% mutate(size = 13)
random.all.qc2022.14 <- random.all.qc2022.14 %>% mutate(size = 14)
random.all.qc2022.15 <- random.all.qc2022.15 %>% mutate(size = 15)
random.all.qc2022.16 <- random.all.qc2022.16 %>% mutate(size = 16)
random.all.qc2022.17 <- random.all.qc2022.17 %>% mutate(size = 17)
random.all.qc2022.18 <- random.all.qc2022.18 %>% mutate(size = 18)
random.all.qc2022.19 <- random.all.qc2022.19 %>% mutate(size = 19)
random.all.qc2022.20 <- random.all.qc2022.20 %>% mutate(size = 20)
random.all.qc2022.21 <- random.all.qc2022.21 %>% mutate(size = 21)
random.all.qc2022.22 <- random.all.qc2022.22 %>% mutate(size = 22)
random.all.qc2022.23 <- random.all.qc2022.23 %>% mutate(size = 23)
random.all.qc2022.24 <- random.all.qc2022.24 %>% mutate(size = 24)
random.all.qc2022.25 <- random.all.qc2022.25 %>% mutate(size = 25)
random.all.qc2022.26 <- random.all.qc2022.26 %>% mutate(size = 26)
random.all.qc2022.27 <- random.all.qc2022.27 %>% mutate(size = 27)
random.all.qc2022.28 <- random.all.qc2022.28 %>% mutate(size = 28)
random.all.qc2022.29 <- random.all.qc2022.29 %>% mutate(size = 29)
random.all.qc2022.30 <- random.all.qc2022.30 %>% mutate(size = 30)

# Merge dataframes for varying sample sizes

random.all.qc2022 <- rbind(random.all.qc2022.1,random.all.qc2022.2,random.all.qc2022.3,
                           random.all.qc2022.4,random.all.qc2022.5,random.all.qc2022.6,
                           random.all.qc2022.7,random.all.qc2022.8,random.all.qc2022.9,
                           random.all.qc2022.10,random.all.qc2022.11,random.all.qc2022.12,
                           random.all.qc2022.13,random.all.qc2022.14,random.all.qc2022.15,
                           random.all.qc2022.16,random.all.qc2022.17,random.all.qc2022.18,
                           random.all.qc2022.19,random.all.qc2022.20,random.all.qc2022.21,
                           random.all.qc2022.22,random.all.qc2022.23,random.all.qc2022.24,
                           random.all.qc2022.25,random.all.qc2022.26,random.all.qc2022.27,
                           random.all.qc2022.28,random.all.qc2022.29,random.all.qc2022.30)

random.all.qc2022.max <- random.all.qc2022 %>% dplyr::group_by(district_code, 
                                                        district_outcome, replicate, size) %>% summarize(most_frequent = names(which.max(table(forecast_district))))

random.all.qc2022.max$most_frequent <- as.numeric(random.all.qc2022.max$most_frequent)

random.all.qc2022.max$correct <- with(random.all.qc2022.max, 
                                      ifelse(most_frequent == district_outcome, 
                                             '1', '0'))

random.all.qc2022.max$correct <- as.numeric(random.all.qc2022.max$correct)

random.all.qc2022.wisdom <- random.all.qc2022.max %>% dplyr::group_by(size) %>% 
  summarise(pct_correct = 100*mean(correct))

write_xlsx(random.all.qc2022.wisdom,
           "C:/Users/pmongrain/Desktop/main/article_5/weighting/random_all_qc2022_wisdom.xlsx")

## DRAW RANDOM SAMPLES: 2011 ONTARIO GENERAL ELECTION

# Sample size: 1

random.all.on2011.1 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=1,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 2

random.all.on2011.2 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=2,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 3

random.all.on2011.3 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=3,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 4

random.all.on2011.4 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=4,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 5

random.all.on2011.5 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=5,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 6

random.all.on2011.6 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=6,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 7

random.all.on2011.7 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=7,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 8

random.all.on2011.8 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=8,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 9

random.all.on2011.9 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=9,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 10

random.all.on2011.10 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=10,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 11

random.all.on2011.11 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=11,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 12

random.all.on2011.12 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=12,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 13

random.all.on2011.13 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=13,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 14

random.all.on2011.14 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=14,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 15

random.all.on2011.15 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=15,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 16

random.all.on2011.16 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=16,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 17

random.all.on2011.17 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=17,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 18

random.all.on2011.18 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=18,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 19

random.all.on2011.19 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=19,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 20

random.all.on2011.20 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=20,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 21

random.all.on2011.21 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=21,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 22

random.all.on2011.22 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=22,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 23

random.all.on2011.23 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=23,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 24

random.all.on2011.24 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=24,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 25

random.all.on2011.25 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=25,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 26

random.all.on2011.26 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=26,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 27

random.all.on2011.27 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=27,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 28

random.all.on2011.28 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=28,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 29

random.all.on2011.29 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=29,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 30

random.all.on2011.30 <- merge.all.on2011 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=30,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Create variable for sample size

random.all.on2011.1 <- random.all.on2011.1 %>% mutate(size = 1)
random.all.on2011.2 <- random.all.on2011.2 %>% mutate(size = 2)
random.all.on2011.3 <- random.all.on2011.3 %>% mutate(size = 3)
random.all.on2011.4 <- random.all.on2011.4 %>% mutate(size = 4)
random.all.on2011.5 <- random.all.on2011.5 %>% mutate(size = 5)
random.all.on2011.6 <- random.all.on2011.6 %>% mutate(size = 6)
random.all.on2011.7 <- random.all.on2011.7 %>% mutate(size = 7)
random.all.on2011.8 <- random.all.on2011.8 %>% mutate(size = 8)
random.all.on2011.9 <- random.all.on2011.9 %>% mutate(size = 9)
random.all.on2011.10 <- random.all.on2011.10 %>% mutate(size = 10)
random.all.on2011.11 <- random.all.on2011.11 %>% mutate(size = 11)
random.all.on2011.12 <- random.all.on2011.12 %>% mutate(size = 12)
random.all.on2011.13 <- random.all.on2011.13 %>% mutate(size = 13)
random.all.on2011.14 <- random.all.on2011.14 %>% mutate(size = 14)
random.all.on2011.15 <- random.all.on2011.15 %>% mutate(size = 15)
random.all.on2011.16 <- random.all.on2011.16 %>% mutate(size = 16)
random.all.on2011.17 <- random.all.on2011.17 %>% mutate(size = 17)
random.all.on2011.18 <- random.all.on2011.18 %>% mutate(size = 18)
random.all.on2011.19 <- random.all.on2011.19 %>% mutate(size = 19)
random.all.on2011.20 <- random.all.on2011.20 %>% mutate(size = 20)
random.all.on2011.21 <- random.all.on2011.21 %>% mutate(size = 21)
random.all.on2011.22 <- random.all.on2011.22 %>% mutate(size = 22)
random.all.on2011.23 <- random.all.on2011.23 %>% mutate(size = 23)
random.all.on2011.24 <- random.all.on2011.24 %>% mutate(size = 24)
random.all.on2011.25 <- random.all.on2011.25 %>% mutate(size = 25)
random.all.on2011.26 <- random.all.on2011.26 %>% mutate(size = 26)
random.all.on2011.27 <- random.all.on2011.27 %>% mutate(size = 27)
random.all.on2011.28 <- random.all.on2011.28 %>% mutate(size = 28)
random.all.on2011.29 <- random.all.on2011.29 %>% mutate(size = 29)
random.all.on2011.30 <- random.all.on2011.30 %>% mutate(size = 30)

# Merge dataframes for varying sample sizes

random.all.on2011 <- rbind(random.all.on2011.1,random.all.on2011.2,random.all.on2011.3,
                           random.all.on2011.4,random.all.on2011.5,random.all.on2011.6,
                           random.all.on2011.7,random.all.on2011.8,random.all.on2011.9,
                           random.all.on2011.10,random.all.on2011.11,random.all.on2011.12,
                           random.all.on2011.13,random.all.on2011.14,random.all.on2011.15,
                           random.all.on2011.16,random.all.on2011.17,random.all.on2011.18,
                           random.all.on2011.19,random.all.on2011.20,random.all.on2011.21,
                           random.all.on2011.22,random.all.on2011.23,random.all.on2011.24,
                           random.all.on2011.25,random.all.on2011.26,random.all.on2011.27,
                           random.all.on2011.28,random.all.on2011.29,random.all.on2011.30)

random.all.on2011.max <- random.all.on2011 %>% dplyr::group_by(district_code, 
                                                        district_outcome, replicate, size) %>% summarize(most_frequent = names(which.max(table(forecast_district))))

random.all.on2011.max$most_frequent <- as.numeric(random.all.on2011.max$most_frequent)

random.all.on2011.max$correct <- with(random.all.on2011.max, 
                                      ifelse(most_frequent == district_outcome, 
                                             '1', '0'))

random.all.on2011.max$correct <- as.numeric(random.all.on2011.max$correct)

random.all.on2011.wisdom <- random.all.on2011.max %>% dplyr::group_by(size) %>% 
  summarise(pct_correct = 100*mean(correct))

write_xlsx(random.all.on2011.wisdom,
           "C:/Users/pmongrain/Desktop/main/article_5/weighting/random_all_on2011_wisdom.xlsx")

## DRAW RANDOM SAMPLES: 2014 ONTARIO GENERAL ELECTION

# Sample size: 1

random.all.on2014.1 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=1,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 2

random.all.on2014.2 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=2,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 3

random.all.on2014.3 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=3,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 4

random.all.on2014.4 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=4,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 5

random.all.on2014.5 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=5,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 6

random.all.on2014.6 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=6,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 7

random.all.on2014.7 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=7,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 8

random.all.on2014.8 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=8,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 9

random.all.on2014.9 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=9,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 10

random.all.on2014.10 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=10,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 11

random.all.on2014.11 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=11,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 12

random.all.on2014.12 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=12,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 13

random.all.on2014.13 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=13,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 14

random.all.on2014.14 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=14,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 15

random.all.on2014.15 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=15,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 16

random.all.on2014.16 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=16,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 17

random.all.on2014.17 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=17,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 18

random.all.on2014.18 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=18,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 19

random.all.on2014.19 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=19,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 20

random.all.on2014.20 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=20,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 21

random.all.on2014.21 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=21,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 22

random.all.on2014.22 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=22,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 23

random.all.on2014.23 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=23,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 24

random.all.on2014.24 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=24,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 25

random.all.on2014.25 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=25,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 26

random.all.on2014.26 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=26,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 27

random.all.on2014.27 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=27,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 28

random.all.on2014.28 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=28,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 29

random.all.on2014.29 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=29,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 30

random.all.on2014.30 <- merge.all.on2014 %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=30,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Create variable for sample size

random.all.on2014.1 <- random.all.on2014.1 %>% mutate(size = 1)
random.all.on2014.2 <- random.all.on2014.2 %>% mutate(size = 2)
random.all.on2014.3 <- random.all.on2014.3 %>% mutate(size = 3)
random.all.on2014.4 <- random.all.on2014.4 %>% mutate(size = 4)
random.all.on2014.5 <- random.all.on2014.5 %>% mutate(size = 5)
random.all.on2014.6 <- random.all.on2014.6 %>% mutate(size = 6)
random.all.on2014.7 <- random.all.on2014.7 %>% mutate(size = 7)
random.all.on2014.8 <- random.all.on2014.8 %>% mutate(size = 8)
random.all.on2014.9 <- random.all.on2014.9 %>% mutate(size = 9)
random.all.on2014.10 <- random.all.on2014.10 %>% mutate(size = 10)
random.all.on2014.11 <- random.all.on2014.11 %>% mutate(size = 11)
random.all.on2014.12 <- random.all.on2014.12 %>% mutate(size = 12)
random.all.on2014.13 <- random.all.on2014.13 %>% mutate(size = 13)
random.all.on2014.14 <- random.all.on2014.14 %>% mutate(size = 14)
random.all.on2014.15 <- random.all.on2014.15 %>% mutate(size = 15)
random.all.on2014.16 <- random.all.on2014.16 %>% mutate(size = 16)
random.all.on2014.17 <- random.all.on2014.17 %>% mutate(size = 17)
random.all.on2014.18 <- random.all.on2014.18 %>% mutate(size = 18)
random.all.on2014.19 <- random.all.on2014.19 %>% mutate(size = 19)
random.all.on2014.20 <- random.all.on2014.20 %>% mutate(size = 20)
random.all.on2014.21 <- random.all.on2014.21 %>% mutate(size = 21)
random.all.on2014.22 <- random.all.on2014.22 %>% mutate(size = 22)
random.all.on2014.23 <- random.all.on2014.23 %>% mutate(size = 23)
random.all.on2014.24 <- random.all.on2014.24 %>% mutate(size = 24)
random.all.on2014.25 <- random.all.on2014.25 %>% mutate(size = 25)
random.all.on2014.26 <- random.all.on2014.26 %>% mutate(size = 26)
random.all.on2014.27 <- random.all.on2014.27 %>% mutate(size = 27)
random.all.on2014.28 <- random.all.on2014.28 %>% mutate(size = 28)
random.all.on2014.29 <- random.all.on2014.29 %>% mutate(size = 29)
random.all.on2014.30 <- random.all.on2014.30 %>% mutate(size = 30)

# Merge dataframes for varying sample sizes

random.all.on2014 <- rbind(random.all.on2014.1,random.all.on2014.2,random.all.on2014.3,
                           random.all.on2014.4,random.all.on2014.5,random.all.on2014.6,
                           random.all.on2014.7,random.all.on2014.8,random.all.on2014.9,
                           random.all.on2014.10,random.all.on2014.11,random.all.on2014.12,
                           random.all.on2014.13,random.all.on2014.14,random.all.on2014.15,
                           random.all.on2014.16,random.all.on2014.17,random.all.on2014.18,
                           random.all.on2014.19,random.all.on2014.20,random.all.on2014.21,
                           random.all.on2014.22,random.all.on2014.23,random.all.on2014.24,
                           random.all.on2014.25,random.all.on2014.26,random.all.on2014.27,
                           random.all.on2014.28,random.all.on2014.29,random.all.on2014.30)

random.all.on2014.max <- random.all.on2014 %>% dplyr::group_by(district_code, 
                                                        district_outcome, replicate, size) %>% summarize(most_frequent = names(which.max(table(forecast_district))))

random.all.on2014.max$most_frequent <- as.numeric(random.all.on2014.max$most_frequent)

random.all.on2014.max$correct <- with(random.all.on2014.max, 
                                      ifelse(most_frequent == district_outcome, 
                                             '1', '0'))

random.all.on2014.max$correct <- as.numeric(random.all.on2014.max$correct)

random.all.on2014.wisdom <- random.all.on2014.max %>% dplyr::group_by(size) %>% 
  summarise(pct_correct = 100*mean(correct))

write_xlsx(random.all.on2014.wisdom,
           "C:/Users/pmongrain/Desktop/main/article_5/weighting/random_all_on2014_wisdom.xlsx")

## GRAPH

# Create variable for election

random.all.ca2011.wisdom <- random.all.ca2011.wisdom %>% add_column(election = "1")
random.all.ca2015.wisdom <- random.all.ca2015.wisdom %>% add_column(election = "2")
random.all.ca2019.wisdom <- random.all.ca2019.wisdom %>% add_column(election = "3")
random.all.on2011.wisdom <- random.all.on2011.wisdom %>% add_column(election = "4")
random.all.on2014.wisdom <- random.all.on2014.wisdom %>% add_column(election = "5")
random.all.qc2022.wisdom <- random.all.qc2022.wisdom %>% add_column(election = "6")

# Merge dataframes

random.wisdom <- rbind(random.all.ca2011.wisdom, random.all.ca2015.wisdom, 
                       random.all.ca2019.wisdom, random.all.on2011.wisdom,
                       random.all.on2014.wisdom, random.all.qc2022.wisdom)

# Save to Excel

write_xlsx(random.wisdom,
           "C:/Users/pmongrain/Desktop/main/article_5/weighting/random_wisdom.xlsx")

# Open dataframe

random.wisdom <- read_excel("C:/Users/pmongrain/Desktop/main/article_5/weighting/random_wisdom.xlsx")

# Create graph

tiff("random_all.tiff", units="in", width=5, height=4, res=300)

ggplot(random.wisdom, aes(x = size, y = pct_correct)) + 
  geom_smooth(size = .5, se = FALSE, colour="black") +
  facet_wrap(vars(election), labeller = labeller(election = c("1" = "(a) 2011 Canada Election", 
                                                              "2" = "(b) 2015 Canada Election",
                                                              "3" = "(c) 2019 Canada Election",
                                                              "4" = "(d) 2011 Ontario Election",
                                                              "5" = "(e) 2014 Ontario Election",
                                                              "6" = "(f) 2022 Quebec Election"))) +
  scale_x_continuous("Sample Size Per District", limits=c(0,30), breaks = seq(0,30,5)) +
  scale_y_continuous("Correctly Forecasted Districts (%)", limits=c(50,100), breaks = seq(50,100,10)) +
  theme(panel.background = element_rect(fill = "white", colour = "white"),
        plot.title = element_text(size = 12, hjust = 0.5), 
        panel.grid.major = element_line(colour="#949494", size = 0.4, linetype=3), 
        panel.grid.minor = element_blank(),
        axis.title = element_text(size = 9), axis.text = element_text(size = 8),
        axis.ticks.x=element_blank(), axis.ticks.y=element_blank(),
        axis.title.y = element_text(margin = margin(t = 0, r = 8, b = 0, l = 0)),
        axis.title.x = element_text(margin = margin(t = 8, r = 0, b = 0, l = 0)),
        legend.title=element_text(size=9), legend.text=element_text(size=8), 
        legend.key.size = unit(.5, 'cm'), strip.text.x = element_text(size = 8),
        strip.background = element_rect(color=NA, fill="white"), legend.position="bottom")

dev.off()

################################################################################
## FIGURE D5a: SAMPLE SIZES (ALL RESPONDENTS)
################################################################################

# Load data

merge.all <- merge

# Define forecast variable as numeric

merge.all$correct_district <- as.numeric(merge.all$correct_district)

# Add column with number of observations by district

merge.all <- merge.all %>% dplyr::group_by(election, district_code) %>% add_tally(name = "obs")

# Keep district with 60 observations or more only

merge.all <- merge.all %>% drop_na(district_code, correct_district) %>% filter(obs >= 60)

# Keep election, district code, district outcome, district forecast, and correct forecast variables only

merge.all <- subset(merge.all, select = c(election, vote_district, district_code, district_outcome, forecast_district, correct_district))

merge.all$district_code <- paste(merge.all$district_code,merge.all$election)

length(unique(merge.all$district_code))

# Sample size: 1

random.all.1 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=1,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 2

random.all.2 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=2,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 3

random.all.3 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=3,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 4

random.all.4 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=4,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 5

random.all.5 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=5,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 6

random.all.6 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=6,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 7

random.all.7 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=7,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 8

random.all.8 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=8,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 9

random.all.9 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=9,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 10

random.all.10 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=10,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 11

random.all.11 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=11,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 12

random.all.12 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=12,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 13

random.all.13 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=13,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 14

random.all.14 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=14,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 15

random.all.15 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=15,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 16

random.all.16 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=16,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 17

random.all.17 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=17,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 18

random.all.18 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=18,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 19

random.all.19 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=19,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 20

random.all.20 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=20,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 21

random.all.21 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=21,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 22

random.all.22 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=22,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 23

random.all.23 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=23,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 24

random.all.24 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=24,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 25

random.all.25 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=25,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 26

random.all.26 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=26,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 27

random.all.27 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=27,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 28

random.all.28 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=28,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 29

random.all.29 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=29,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 30

random.all.30 <- merge.all %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=30,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Create variable for sample size

random.all.1 <- random.all.1 %>% mutate(size = 1)
random.all.2 <- random.all.2 %>% mutate(size = 2)
random.all.3 <- random.all.3 %>% mutate(size = 3)
random.all.4 <- random.all.4 %>% mutate(size = 4)
random.all.5 <- random.all.5 %>% mutate(size = 5)
random.all.6 <- random.all.6 %>% mutate(size = 6)
random.all.7 <- random.all.7 %>% mutate(size = 7)
random.all.8 <- random.all.8 %>% mutate(size = 8)
random.all.9 <- random.all.9 %>% mutate(size = 9)
random.all.10 <- random.all.10 %>% mutate(size = 10)
random.all.11 <- random.all.11 %>% mutate(size = 11)
random.all.12 <- random.all.12 %>% mutate(size = 12)
random.all.13 <- random.all.13 %>% mutate(size = 13)
random.all.14 <- random.all.14 %>% mutate(size = 14)
random.all.15 <- random.all.15 %>% mutate(size = 15)
random.all.16 <- random.all.16 %>% mutate(size = 16)
random.all.17 <- random.all.17 %>% mutate(size = 17)
random.all.18 <- random.all.18 %>% mutate(size = 18)
random.all.19 <- random.all.19 %>% mutate(size = 19)
random.all.20 <- random.all.20 %>% mutate(size = 20)
random.all.21 <- random.all.21 %>% mutate(size = 21)
random.all.22 <- random.all.22 %>% mutate(size = 22)
random.all.23 <- random.all.23 %>% mutate(size = 23)
random.all.24 <- random.all.24 %>% mutate(size = 24)
random.all.25 <- random.all.25 %>% mutate(size = 25)
random.all.26 <- random.all.26 %>% mutate(size = 26)
random.all.27 <- random.all.27 %>% mutate(size = 27)
random.all.28 <- random.all.28 %>% mutate(size = 28)
random.all.29 <- random.all.29 %>% mutate(size = 29)
random.all.30 <- random.all.30 %>% mutate(size = 30)

# Merge dataframes for varying sample sizes

random.all <- rbind(random.all.1,random.all.2,random.all.3,
                    random.all.4,random.all.5,random.all.6,
                    random.all.7,random.all.8,random.all.9,
                    random.all.10,random.all.11,random.all.12,
                    random.all.13,random.all.14,random.all.15,
                    random.all.16,random.all.17,random.all.18,
                    random.all.19,random.all.20,random.all.21,
                    random.all.22,random.all.23,random.all.24,
                    random.all.25,random.all.26,random.all.27,
                    random.all.28,random.all.29,random.all.30)

random.all.max <- random.all %>% dplyr::group_by(district_code, 
                                                 district_outcome, replicate, size) %>% summarize(most_frequent = names(which.max(table(forecast_district))))

random.all.max$most_frequent <- as.numeric(random.all.max$most_frequent)

random.all.max$correct <- with(random.all.max, 
                               ifelse(most_frequent == district_outcome, 
                                      '1', '0'))

random.all.max$correct <- as.numeric(random.all.max$correct)

random.all.wisdom <- random.all.max %>% dplyr::group_by(size) %>% 
  summarise(pct_correct = 100*mean(correct))

write_xlsx(random.all.wisdom,
           "C:/Users/pmongrain/Desktop/main/article_5/weighting/random_all_wisdom.xlsx")

################################################################################
## FIGURE D5b: SAMPLE SIZES (LOSERS ONLY)
################################################################################

# Load data

merge.all <- merge

# Define forecast variable as numeric

merge.all$correct_district <- as.numeric(merge.all$correct_district)

# Keep only losers

merge.all <- merge.all %>%
  filter(vote_district == "No")

# Add column with number of observations by district

merge.all <- merge.all %>% dplyr::group_by(election, district_code) %>% add_tally(name = "obs")

# Keep district with 60 observations or more only

merge.all <- merge.all %>% drop_na(district_code, correct_district) %>% filter(obs >= 60)

# Keep election, district code, district outcome, district forecast, and correct forecast variables only

merge.all <- subset(merge.all, select = c(election, vote_district, district_code, district_outcome, forecast_district, correct_district))

merge.all$district_code <- paste(merge.all$district_code,merge.all$election)

length(unique(merge.all$district_code))

# Sample size: 1

random.all.losers.1 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=1,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 2

random.all.losers.2 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=2,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 3

random.all.losers.3 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=3,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 4

random.all.losers.4 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=4,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 5

random.all.losers.5 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=5,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 6

random.all.losers.6 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=6,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 7

random.all.losers.7 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=7,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 8

random.all.losers.8 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=8,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 9

random.all.losers.9 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=9,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 10

random.all.losers.10 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=10,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 11

random.all.losers.11 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=11,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 12

random.all.losers.12 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=12,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 13

random.all.losers.13 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=13,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 14

random.all.losers.14 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=14,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 15

random.all.losers.15 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=15,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 16

random.all.losers.16 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=16,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 17

random.all.losers.17 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=17,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 18

random.all.losers.18 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=18,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 19

random.all.losers.19 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=19,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 20

random.all.losers.20 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=20,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 21

random.all.losers.21 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=21,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 22

random.all.losers.22 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=22,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 23

random.all.losers.23 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=23,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 24

random.all.losers.24 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=24,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 25

random.all.losers.25 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=25,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 26

random.all.losers.26 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=26,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 27

random.all.losers.27 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=27,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 28

random.all.losers.28 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=28,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 29

random.all.losers.29 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=29,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 30

random.all.losers.30 <- merge.all %>% filter(vote_district == "No") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=30,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Create variable for sample size

random.all.losers.1 <- random.all.losers.1 %>% mutate(size = 1)
random.all.losers.2 <- random.all.losers.2 %>% mutate(size = 2)
random.all.losers.3 <- random.all.losers.3 %>% mutate(size = 3)
random.all.losers.4 <- random.all.losers.4 %>% mutate(size = 4)
random.all.losers.5 <- random.all.losers.5 %>% mutate(size = 5)
random.all.losers.6 <- random.all.losers.6 %>% mutate(size = 6)
random.all.losers.7 <- random.all.losers.7 %>% mutate(size = 7)
random.all.losers.8 <- random.all.losers.8 %>% mutate(size = 8)
random.all.losers.9 <- random.all.losers.9 %>% mutate(size = 9)
random.all.losers.10 <- random.all.losers.10 %>% mutate(size = 10)
random.all.losers.11 <- random.all.losers.11 %>% mutate(size = 11)
random.all.losers.12 <- random.all.losers.12 %>% mutate(size = 12)
random.all.losers.13 <- random.all.losers.13 %>% mutate(size = 13)
random.all.losers.14 <- random.all.losers.14 %>% mutate(size = 14)
random.all.losers.15 <- random.all.losers.15 %>% mutate(size = 15)
random.all.losers.16 <- random.all.losers.16 %>% mutate(size = 16)
random.all.losers.17 <- random.all.losers.17 %>% mutate(size = 17)
random.all.losers.18 <- random.all.losers.18 %>% mutate(size = 18)
random.all.losers.19 <- random.all.losers.19 %>% mutate(size = 19)
random.all.losers.20 <- random.all.losers.20 %>% mutate(size = 20)
random.all.losers.21 <- random.all.losers.21 %>% mutate(size = 21)
random.all.losers.22 <- random.all.losers.22 %>% mutate(size = 22)
random.all.losers.23 <- random.all.losers.23 %>% mutate(size = 23)
random.all.losers.24 <- random.all.losers.24 %>% mutate(size = 24)
random.all.losers.25 <- random.all.losers.25 %>% mutate(size = 25)
random.all.losers.26 <- random.all.losers.26 %>% mutate(size = 26)
random.all.losers.27 <- random.all.losers.27 %>% mutate(size = 27)
random.all.losers.28 <- random.all.losers.28 %>% mutate(size = 28)
random.all.losers.29 <- random.all.losers.29 %>% mutate(size = 29)
random.all.losers.30 <- random.all.losers.30 %>% mutate(size = 30)

# Merge dataframes for varying sample sizes

random.all <- rbind(random.all.losers.1,random.all.losers.2,random.all.losers.3,
                    random.all.losers.4,random.all.losers.5,random.all.losers.6,
                    random.all.losers.7,random.all.losers.8,random.all.losers.9,
                    random.all.losers.10,random.all.losers.11,random.all.losers.12,
                    random.all.losers.13,random.all.losers.14,random.all.losers.15,
                    random.all.losers.16,random.all.losers.17,random.all.losers.18,
                    random.all.losers.19,random.all.losers.20,random.all.losers.21,
                    random.all.losers.22,random.all.losers.23,random.all.losers.24,
                    random.all.losers.25,random.all.losers.26,random.all.losers.27,
                    random.all.losers.28,random.all.losers.29,random.all.losers.30)

random.all.losers.max <- random.all %>% dplyr::group_by(district_code, 
                                                        district_outcome, replicate, size) %>% summarize(most_frequent = names(which.max(table(forecast_district))))

random.all.losers.max$most_frequent <- as.numeric(random.all.losers.max$most_frequent)

random.all.losers.max$correct <- with(random.all.losers.max, 
                                      ifelse(most_frequent == district_outcome, 
                                             '1', '0'))

random.all.losers.max$correct <- as.numeric(random.all.losers.max$correct)

random.all.losers.wisdom <- random.all.losers.max %>% dplyr::group_by(size) %>% 
  summarise(pct_correct = 100*mean(correct))

write_xlsx(random.all.losers.wisdom,
           "C:/Users/pmongrain/Desktop/main/article_5/weighting/random_all_losers_wisdom.xlsx")

################################################################################
## FIGURE D5c: SAMPLE SIZES (WINNERS ONLY)
################################################################################

# Load data

merge.all <- merge

# Define forecast variable as numeric

merge.all$correct_district <- as.numeric(merge.all$correct_district)

# Keep only winners

merge.all <- merge.all %>%
  filter(vote_district == "Yes")

# Add column with number of observations by district

merge.all <- merge.all %>% dplyr::group_by(election, district_code) %>% add_tally(name = "obs")

# Keep district with 60 observations or more only

merge.all <- merge.all %>% drop_na(district_code, correct_district) %>% filter(obs >= 60)

# Keep election, district code, district outcome, district forecast, and correct forecast variables only

merge.all <- subset(merge.all, select = c(election, vote_district, district_code, district_outcome, forecast_district, correct_district))

merge.all$district_code <- paste(merge.all$district_code,merge.all$election)

length(unique(merge.all$district_code))

# Sample size: 1

random.all.winners.1 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=1,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 2

random.all.winners.2 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=2,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 3

random.all.winners.3 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=3,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 4

random.all.winners.4 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=4,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 5

random.all.winners.5 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=5,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 6

random.all.winners.6 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=6,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 7

random.all.winners.7 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=7,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 8

random.all.winners.8 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=8,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 9

random.all.winners.9 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=9,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 10

random.all.winners.10 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=10,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 11

random.all.winners.11 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=11,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 12

random.all.winners.12 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=12,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 13

random.all.winners.13 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=13,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 14

random.all.winners.14 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=14,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 15

random.all.winners.15 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=15,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 16

random.all.winners.16 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=16,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 17

random.all.winners.17 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=17,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 18

random.all.winners.18 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=18,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 19

random.all.winners.19 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=19,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 20

random.all.winners.20 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=20,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 21

random.all.winners.21 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=21,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 22

random.all.winners.22 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=22,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 23

random.all.winners.23 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=23,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 24

random.all.winners.24 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=24,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 25

random.all.winners.25 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=25,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 26

random.all.winners.26 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=26,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 27

random.all.winners.27 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=27,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 28

random.all.winners.28 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=28,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 29

random.all.winners.29 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=29,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Sample size: 30

random.all.winners.30 <- merge.all %>% filter(vote_district == "Yes") %>% 
  dplyr::group_by(district_code) %>% 
  tidyr::nest() %>% 
  mutate(v=map(data,~rep_sample_n(.,size=30,replace=TRUE,reps=10))) %>% 
  unnest(v)

# Create variable for sample size

random.all.winners.1 <- random.all.winners.1 %>% mutate(size = 1)
random.all.winners.2 <- random.all.winners.2 %>% mutate(size = 2)
random.all.winners.3 <- random.all.winners.3 %>% mutate(size = 3)
random.all.winners.4 <- random.all.winners.4 %>% mutate(size = 4)
random.all.winners.5 <- random.all.winners.5 %>% mutate(size = 5)
random.all.winners.6 <- random.all.winners.6 %>% mutate(size = 6)
random.all.winners.7 <- random.all.winners.7 %>% mutate(size = 7)
random.all.winners.8 <- random.all.winners.8 %>% mutate(size = 8)
random.all.winners.9 <- random.all.winners.9 %>% mutate(size = 9)
random.all.winners.10 <- random.all.winners.10 %>% mutate(size = 10)
random.all.winners.11 <- random.all.winners.11 %>% mutate(size = 11)
random.all.winners.12 <- random.all.winners.12 %>% mutate(size = 12)
random.all.winners.13 <- random.all.winners.13 %>% mutate(size = 13)
random.all.winners.14 <- random.all.winners.14 %>% mutate(size = 14)
random.all.winners.15 <- random.all.winners.15 %>% mutate(size = 15)
random.all.winners.16 <- random.all.winners.16 %>% mutate(size = 16)
random.all.winners.17 <- random.all.winners.17 %>% mutate(size = 17)
random.all.winners.18 <- random.all.winners.18 %>% mutate(size = 18)
random.all.winners.19 <- random.all.winners.19 %>% mutate(size = 19)
random.all.winners.20 <- random.all.winners.20 %>% mutate(size = 20)
random.all.winners.21 <- random.all.winners.21 %>% mutate(size = 21)
random.all.winners.22 <- random.all.winners.22 %>% mutate(size = 22)
random.all.winners.23 <- random.all.winners.23 %>% mutate(size = 23)
random.all.winners.24 <- random.all.winners.24 %>% mutate(size = 24)
random.all.winners.25 <- random.all.winners.25 %>% mutate(size = 25)
random.all.winners.26 <- random.all.winners.26 %>% mutate(size = 26)
random.all.winners.27 <- random.all.winners.27 %>% mutate(size = 27)
random.all.winners.28 <- random.all.winners.28 %>% mutate(size = 28)
random.all.winners.29 <- random.all.winners.29 %>% mutate(size = 29)
random.all.winners.30 <- random.all.winners.30 %>% mutate(size = 30)

# Merge dataframes for varying sample sizes

random.all <- rbind(random.all.winners.1,random.all.winners.2,random.all.winners.3,
                    random.all.winners.4,random.all.winners.5,random.all.winners.6,
                    random.all.winners.7,random.all.winners.8,random.all.winners.9,
                    random.all.winners.10,random.all.winners.11,random.all.winners.12,
                    random.all.winners.13,random.all.winners.14,random.all.winners.15,
                    random.all.winners.16,random.all.winners.17,random.all.winners.18,
                    random.all.winners.19,random.all.winners.20,random.all.winners.21,
                    random.all.winners.22,random.all.winners.23,random.all.winners.24,
                    random.all.winners.25,random.all.winners.26,random.all.winners.27,
                    random.all.winners.28,random.all.winners.29,random.all.winners.30)

random.all.winners.max <- random.all %>% dplyr::group_by(district_code, 
                                                         district_outcome, replicate, size) %>% summarize(most_frequent = names(which.max(table(forecast_district))))

random.all.winners.max$most_frequent <- as.numeric(random.all.winners.max$most_frequent)

random.all.winners.max$correct <- with(random.all.winners.max, 
                                       ifelse(most_frequent == district_outcome, 
                                              '1', '0'))

random.all.winners.max$correct <- as.numeric(random.all.winners.max$correct)

random.all.winners.wisdom <- random.all.winners.max %>% dplyr::group_by(size) %>% 
  summarise(pct_correct = 100*mean(correct))

write_xlsx(random.all.winners.wisdom,
           "C:/Users/pmongrain/Desktop/main/article_5/weighting/random_all_winners_wisdom.xlsx")

## GRAPH

# Create variable for election

random.all.wisdom <- random.all.wisdom %>% add_column(election = "1")
random.all.losers.wisdom <- random.all.losers.wisdom %>% add_column(election = "2")
random.all.winners.wisdom <- random.all.winners.wisdom %>% add_column(election = "3")

# Merge dataframes

random.overall.wisdom <- rbind(random.all.wisdom, random.all.losers.wisdom, 
                               random.all.winners.wisdom)

# Save to Excel

write_xlsx(random.overall.wisdom,
           "C:/Users/pmongrain/Desktop/main/article_5/weighting/random_overall_wisdom.xlsx")

# Open dataframe

random.overall.wisdom <- read_excel("C:/Users/pmongrain/Desktop/main/article_5/weighting/random_overall_wisdom.xlsx")

# Create graph

tiff("random_overall.tiff", units="in", width=7, height=3, res=1200)

ggplot(random.overall.wisdom, aes(x = size, y = pct_correct)) + 
  geom_smooth(size = .5, se = FALSE, colour="black") +
  facet_wrap(vars(election), labeller = labeller(election = c("1" = "(a) All Respondents", 
                                                              "2" = "(b) Losers Only",
                                                              "3" = "(c) Winners Only"))) +
  scale_x_continuous("Sample Size Per District", limits=c(0,30), breaks = seq(0,30,5)) +
  scale_y_continuous("Correctly Forecasted Districts (%)", limits=c(40,100), breaks = seq(40,100,10)) +
  theme(panel.background = element_rect(fill = "white", colour = "white"),
        plot.title = element_text(size = 12, hjust = 0.5), 
        panel.grid.major = element_line(colour="#949494", size = 0.4, linetype=3), 
        panel.grid.minor = element_blank(),
        axis.title = element_text(size = 9), axis.text = element_text(size = 8),
        axis.ticks.x=element_blank(), axis.ticks.y=element_blank(),
        axis.title.y = element_text(margin = margin(t = 0, r = 8, b = 0, l = 0)),
        axis.title.x = element_text(margin = margin(t = 8, r = 0, b = 0, l = 0)),
        legend.title=element_text(size=9), legend.text=element_text(size=8), 
        legend.key.size = unit(.5, 'cm'), strip.text.x = element_text(size = 9),
        strip.background = element_rect(color=NA, fill="white"), legend.position="bottom")

dev.off()

################################################################################
## FIGURE 2: WISDOM OF CROWDS
################################################################################

# Load data

woc <- sjlabelled::read_stata("data/woc.dta")

woc$id <- as.factor(woc$id)

theme_set(theme_minimal())

# Graph

tiff("woc.tiff", units="in", width=5, height=5, res=300)

woc.g <- ggplot(data = woc, aes(x = id, y = percent, 
                fill = factor(level))) + geom_bar(stat = "identity", 
                width = 0.8, position = position_dodge(preserve="single")) +
                scale_x_discrete("", labels=c("1" = "CA 2011", "2" = "CA 2015", 
                "3"= "CA 2019", "4" = "ON 2011", "5" = "ON 2014", "6" = "QC 2022", 
                "7" = "All")) + scale_y_continuous("% Correct Forecasts", 
                limits=c(0, 100), breaks = seq(0, 100, 20)) + 
                theme(legend.title = element_text(size=10), 
                legend.position="bottom", legend.key.size = unit(.6, 'line'), 
                strip.text.x = element_text(size = 12)) + 
                scale_fill_manual(name="Forecast Level", 
                labels=c("Individual","District"), values=c("black", "grey"))

woc.g

dev.off()

################################################################################
## FIGURE C1: VOTE INTENTION POLLS - CANADA 2011
################################################################################

ca2011 <- read_dta("polls/ca2011_polls.dta")

colors <- c("lib" = "#d92026", "con" = "#0f2d52", "new" = "#f58320", 
            "bloc" = "#00a5ec", "grn" = "#3d953b")

tiff("ca2011.tiff", units="in", width=6, height=5, res=300)

pc2011 <- ggplot(ca2011, aes(x = time)) + 
  geom_point(aes(y = lpc_int), color = "#d92026", alpha = 1/4) + 
  geom_point(aes(y = cpc_int), color = "#0f2d52", alpha = 1/4) +
  geom_point(aes(y = ndp_int), color = "#f58320", alpha = 1/4) +
  geom_point(aes(y = bq_int), color = "#00a5ec", alpha = 1/4) +
  geom_point(aes(y = green_int), color = "#3d953b", alpha = 1/4)
pc2011

pc2011 <- pc2011 + geom_smooth(aes(y = lpc_int, color = "lib"), se = FALSE, method = loess, span = 0.1, size = 1) + 
  geom_smooth(aes(y = cpc_int, color = "con"), se = FALSE, method = loess, span = 0.1, size = 1) + 
  geom_smooth(aes(y = ndp_int, color = "new"), se = FALSE, method = loess, span = 0.1, size = 1) + 
  geom_smooth(aes(y = bq_int, color = "bloc"), se = FALSE, method = loess, span = 0.1, size = 1) + 
  geom_smooth(aes(y = green_int, color = "grn"), se = FALSE, method = loess, span = 0.1, size = 1) +
  labs(color = "") +
  scale_color_manual(labels = c("BQ", "CPC", "GPC", "LPC", "NDP"), 
                     values = colors) +
  guides(color=guide_legend(nrow=2, byrow=TRUE)) + theme(
    legend.position = c(.3, 1.02), legend.justification = c("right", "top"),
    legend.box.just = "right", legend.margin = margin(0, 0, 0, 0),
    legend.background = element_rect(fill="NA"),legend.text=element_text(size=7),
    legend.key.size = unit(.4, 'cm'))
pc2011

pc2011 <- pc2011 + ylab("Percentage") + xlab("Number of Days Before the Election") + 
  ggtitle("") + 
  theme(panel.background = element_rect(fill = "white", colour = "white"),
        plot.title = element_text(size = 12, hjust = 0.5), panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), panel.border = element_rect(colour = "black", fill=NA, size=1),
        axis.title = element_text(size = 9.5), axis.text = element_text(size = 9.5),
        axis.ticks.x=element_line(linewidth=.5), axis.ticks.y=element_line(linewidth=.5),
        axis.title.y = element_text(margin = margin(t = 0, r = 8, b = 0, l = 0)),
        axis.title.x = element_text(margin = margin(t = 8, r = 0, b = 0, l = 0))) + 
  scale_x_reverse("Number of Days Before the Election", limits=c(40,0), breaks = seq(0,40,10), expand = expansion(mult = c(0.02, 0.03))) +
  scale_y_continuous("Percentage", limits=c(0,50), breaks = seq(0,50,10), expand = expansion(mult = c(0.03, 0.03))) +
  geom_point(aes(x = 0, y = 18.91), colour = "#d92026", size = 2) +
  geom_point(aes(x = 0, y = 18.91), shape = 5, colour = "#d92026", size = 4, stroke = 1.) +
  geom_point(aes(x = 0, y = 39.62), colour = "#0f2d52", size = 2) +
  geom_point(aes(x = 0, y = 39.62), shape = 5, colour = "#0f2d52", size = 4, stroke = 1.) +
  geom_point(aes(x = 0, y = 30.63), colour = "#f58320", size = 2) +
  geom_point(aes(x = 0, y = 30.63), shape = 5, colour = "#f58320", size = 4, stroke = 1.) +
  geom_point(aes(x = 0, y = 6.04), colour = "#00a5ec", size = 2) +
  geom_point(aes(x = 0, y = 6.04), shape = 5, colour = "#00a5ec", size = 4, stroke = 1.) +
  geom_point(aes(x = 0, y = 3.91), colour = "#3d953b", size = 2) +
  geom_point(aes(x = 0, y = 3.91), shape = 5, colour = "#3d953b", size = 4, stroke = 1.) +
  geom_segment(aes(x = 0, y = 0, xend = 0, yend = 50), linetype=2) +
  geom_segment(aes(x = 4, y = 0, xend = 4, yend = 50), linetype=2)
pc2011

dev.off()

################################################################################
## FIGURE C1=2: VOTE INTENTION POLLS - CANADA 2015
################################################################################

ca2015 <- read_dta("polls/ca2015_polls.dta")

colors <- c("lib" = "#d92026", "con" = "#0f2d52", "new" = "#f58320", 
            "bloc" = "#00a5ec", "grn" = "#3d953b")

tiff("ca2015.tiff", units="in", width=6, height=5, res=300)

pc2015 <- ggplot(ca2015, aes(x = time)) + 
  geom_point(aes(y = lpc_int), color = "#d92026", alpha = 1/4) + 
  geom_point(aes(y = cpc_int), color = "#0f2d52", alpha = 1/4) +
  geom_point(aes(y = ndp_int), color = "#f58320", alpha = 1/4) +
  geom_point(aes(y = bq_int), color = "#00a5ec", alpha = 1/4) +
  geom_point(aes(y = green_int), color = "#3d953b", alpha = 1/4)
pc2015

pc2015 <- pc2015 + geom_smooth(aes(y = lpc_int, color = "lib"), se = FALSE, method = loess, span = 0.1, size = 1) + 
  geom_smooth(aes(y = cpc_int, color = "con"), se = FALSE, method = loess, span = 0.1, size = 1) + 
  geom_smooth(aes(y = ndp_int, color = "new"), se = FALSE, method = loess, span = 0.1, size = 1) + 
  geom_smooth(aes(y = bq_int, color = "bloc"), se = FALSE, method = loess, span = 0.1, size = 1) + 
  geom_smooth(aes(y = green_int, color = "grn"), se = FALSE, method = loess, span = 0.1, size = 1) +
  labs(color = "") +
  scale_color_manual(labels = c("BQ", "CPC", "GPC", "LPC", "NDP"), 
                     values = colors) +
  guides(color=guide_legend(nrow=2, byrow=TRUE)) + theme(
    legend.position = c(.94, 1.02), legend.justification = c("right", "top"),
    legend.box.just = "right", legend.margin = margin(0, 0, 0, 0),
    legend.background = element_rect(fill="NA"),legend.text=element_text(size=7),
    legend.key.size = unit(.4, 'cm'))
pc2015

pc2015 <- pc2015 + ylab("Percentage") + xlab("Number of Days Before the Election") + 
  ggtitle("") + 
  theme(panel.background = element_rect(fill = "white", colour = "white"),
        plot.title = element_text(size = 12, hjust = 0.5), panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), panel.border = element_rect(colour = "black", fill=NA, size=1),
        axis.title = element_text(size = 9.5), axis.text = element_text(size = 9.5),
        axis.ticks.x=element_line(linewidth=.5), axis.ticks.y=element_line(linewidth=.5),
        axis.title.y = element_text(margin = margin(t = 0, r = 8, b = 0, l = 0)),
        axis.title.x = element_text(margin = margin(t = 8, r = 0, b = 0, l = 0))) + 
  scale_x_reverse("Number of Days Before the Election", limits=c(80,0), breaks = seq(0,80,10), expand = expansion(mult = c(0.02, 0.03))) +
  scale_y_continuous("Percentage", limits=c(0,50), breaks = seq(0,50,10), expand = expansion(mult = c(0.03, 0.03))) +
  geom_point(aes(x = 0, y = 39.5), colour = "#d92026", size = 2) +
  geom_point(aes(x = 0, y = 39.5), shape = 5, colour = "#d92026", size = 4, stroke = 1.) +
  geom_point(aes(x = 0, y = 31.9), colour = "#0f2d52", size = 2) +
  geom_point(aes(x = 0, y = 31.9), shape = 5, colour = "#0f2d52", size = 4, stroke = 1.) +
  geom_point(aes(x = 0, y = 19.7), colour = "#f58320", size = 2) +
  geom_point(aes(x = 0, y = 19.7), shape = 5, colour = "#f58320", size = 4, stroke = 1.) +
  geom_point(aes(x = 0, y = 4.7), colour = "#00a5ec", size = 2) +
  geom_point(aes(x = 0, y = 4.7), shape = 5, colour = "#00a5ec", size = 4, stroke = 1.) +
  geom_point(aes(x = 0, y = 3.4), colour = "#3d953b", size = 2) +
  geom_point(aes(x = 0, y = 3.4), shape = 5, colour = "#3d953b", size = 4, stroke = 1.) +
  geom_segment(aes(x = 0, y = 0, xend = 0, yend = 50), linetype=2) +
  geom_segment(aes(x = 54, y = 0, xend = 54, yend = 50), linetype=2) 
pc2015

dev.off()

################################################################################
## FIGURE C3: VOTE INTENTION POLLS - CANADA 2019
################################################################################

ca2019 <- read_dta("polls/ca2019_polls.dta")

colors <- c("lib" = "#d92026", "con" = "#0f2d52", "new" = "#f58320", 
            "bloc" = "#00a5ec", "grn" = "#3d953b", "people" = "#442D7B")

tiff("ca2019.tiff", units="in", width=6, height=5, res=300)

pc2019 <- ggplot(ca2019, aes(x = time)) + 
  geom_point(aes(y = lpc), color = "#d92026", alpha = 1/4) + 
  geom_point(aes(y = cpc), color = "#0f2d52", alpha = 1/4) +
  geom_point(aes(y = ndp), color = "#f58320", alpha = 1/4) +
  geom_point(aes(y = bq), color = "#00a5ec", alpha = 1/4) +
  geom_point(aes(y = gpc), color = "#3d953b", alpha = 1/4) +
  geom_point(aes(y = ppc), color = "#442D7B", alpha = 1/4)
pc2019

pc2019 <- pc2019 + geom_smooth(aes(y = lpc, color = "lib"), se = FALSE, method = loess, span = 0.1, size = 1) + 
  geom_smooth(aes(y = cpc, color = "con"), se = FALSE, method = loess, span = 0.1, size = 1) + 
  geom_smooth(aes(y = ndp, color = "new"), se = FALSE, method = loess, span = 0.1, size = 1) + 
  geom_smooth(aes(y = bq, color = "bloc"), se = FALSE, method = loess, span = 0.1, size = 1) + 
  geom_smooth(aes(y = gpc, color = "grn"), se = FALSE, method = loess, span = 0.1, size = 1) +
  geom_smooth(aes(y = ppc, color = "people"), se = FALSE, method = loess, span = 0.1, size = 1) +
  labs(color = "") +
  scale_color_manual(labels = c("BQ", "CPC", "GPC", "LPC", "NDP", "PPC"), 
                     values = colors) +
  guides(color=guide_legend(nrow=2, byrow=TRUE)) + theme(
    legend.position = c(.94, 1.02), legend.justification = c("right", "top"),
    legend.box.just = "right", legend.margin = margin(0, 0, 0, 0),
    legend.background = element_rect(fill="NA"),legend.text=element_text(size=7),
    legend.key.size = unit(.4, 'cm'))
pc2019

pc2019 <- pc2019 + ylab("Percentage") + xlab("Number of Days Before the Election") + 
  ggtitle("") + 
  theme(panel.background = element_rect(fill = "white", colour = "white"),
        plot.title = element_text(size = 12, hjust = 0.5), panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), panel.border = element_rect(colour = "black", fill=NA, size=1),
        axis.title = element_text(size = 9.5), axis.text = element_text(size = 9.5),
        axis.ticks.x=element_line(linewidth=.5), axis.ticks.y=element_line(linewidth=.5),
        axis.title.y = element_text(margin = margin(t = 0, r = 8, b = 0, l = 0)),
        axis.title.x = element_text(margin = margin(t = 8, r = 0, b = 0, l = 0))) + 
  scale_x_reverse("Number of Days Before the Election", limits=c(41,0), breaks = seq(0,41,10), expand = expansion(mult = c(0.02, 0.03))) +
  scale_y_continuous("Percentage", limits=c(0,50), breaks = seq(0,50,10), expand = expansion(mult = c(0.03, 0.03))) +
  geom_point(aes(x = 0, y = 33.1), colour = "#d92026", size = 2) +
  geom_point(aes(x = 0, y = 33.1), shape = 5, colour = "#d92026", size = 4, stroke = 1.) +
  geom_point(aes(x = 0, y = 34.3), colour = "#0f2d52", size = 2) +
  geom_point(aes(x = 0, y = 34.3), shape = 5, colour = "#0f2d52", size = 4, stroke = 1.) +
  geom_point(aes(x = 0, y = 16.0), colour = "#f58320", size = 2) +
  geom_point(aes(x = 0, y = 16.0), shape = 5, colour = "#f58320", size = 4, stroke = 1.) +
  geom_point(aes(x = 0, y = 7.6), colour = "#00a5ec", size = 2) +
  geom_point(aes(x = 0, y = 7.6), shape = 5, colour = "#00a5ec", size = 4, stroke = 1.) +
  geom_point(aes(x = 0, y = 6.6), colour = "#3d953b", size = 2) +
  geom_point(aes(x = 0, y = 6.6), shape = 5, colour = "#3d953b", size = 4, stroke = 1.) +
  geom_point(aes(x = 0, y = 1.6), colour = "#442D7B", size = 2) +
  geom_point(aes(x = 0, y = 1.6), shape = 5, colour = "#442D7B", size = 4, stroke = 1.) +
  geom_segment(aes(x = 0, y = 0, xend = 0, yend = 50), linetype=2) +
  geom_segment(aes(x = 41, y = 0, xend = 41, yend = 50), linetype=2) 
pc2019

dev.off()

################################################################################
## FIGURE C4: VOTE INTENTION POLLS - ONTARIO 2011
################################################################################

on2011 <- read_dta("polls/on2011_polls.dta")

colors <- c("olib" = "#ec1c38", "ocon" = "#245fac", "onew" = "#f99d1c", 
            "ogrn" = "#307730")

tiff("on2011.tiff", units="in", width=6, height=5, res=300)

po2011 <- ggplot(on2011, aes(x = time)) + 
  geom_point(aes(y = liberal), color = "#ec1c38", alpha = 1/4) + 
  geom_point(aes(y = pc), color = "#245fac", alpha = 1/4) +
  geom_point(aes(y = ndp), color = "#f99d1c", alpha = 1/4) +
  geom_point(aes(y = green), color = "#307730", alpha = 1/4)
po2011

po2011 <- po2011 + geom_smooth(aes(y = liberal, color = "olib"), se = FALSE, method = loess, span = 0.1, size = 1) + 
  geom_smooth(aes(y = pc, color = "ocon"), se = FALSE, method = loess, span = 0.1, size = 1) + 
  geom_smooth(aes(y = ndp, color = "onew"), se = FALSE, method = loess, span = 0.1, size = 1) + 
  geom_smooth(aes(y = green, color = "ogrn"), se = FALSE, method = loess, span = 0.1, size = 1) +
  labs(color = "") +
  scale_color_manual(labels = c("OPC", "GPO", "OLP", "ONDP"), 
                     values = colors) +
  guides(color=guide_legend(nrow=2, byrow=TRUE)) + theme(
    legend.position = c(.22, 1.02), legend.justification = c("right", "top"),
    legend.box.just = "right", legend.margin = margin(0, 0, 0, 0),
    legend.background = element_rect(fill="NA"),legend.text=element_text(size=7),
    legend.key.size = unit(.4, 'cm'))
po2011

po2011 <- po2011 + ylab("Percentage") + xlab("Number of Days Before the Election") + 
  ggtitle("") + 
  theme(panel.background = element_rect(fill = "white", colour = "white"),
        plot.title = element_text(size = 12, hjust = 0.5), panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), panel.border = element_rect(colour = "black", fill=NA, size=1),
        axis.title = element_text(size = 9.5), axis.text = element_text(size = 9.5),
        axis.ticks.x=element_line(linewidth=.5), axis.ticks.y=element_line(linewidth=.5),
        axis.title.y = element_text(margin = margin(t = 0, r = 8, b = 0, l = 0)),
        axis.title.x = element_text(margin = margin(t = 8, r = 0, b = 0, l = 0))) + 
  scale_x_reverse("Number of Days Before the Election", limits=c(40,0), breaks = seq(0,40,10), expand = expansion(mult = c(0.02, 0.03))) +
  scale_y_continuous("Percentage", limits=c(0,50), breaks = seq(0,50,10), expand = expansion(mult = c(0.03, 0.03))) +
  geom_point(aes(x = 0, y = 37.65), colour = "#ec1c38", size = 2) +
  geom_point(aes(x = 0, y = 37.65), shape = 5, colour = "#ec1c38", size = 4, stroke = 1.) +
  geom_point(aes(x = 0, y = 35.45), colour = "#245fac", size = 2) +
  geom_point(aes(x = 0, y = 35.45), shape = 5, colour = "#245fac", size = 4, stroke = 1.) +
  geom_point(aes(x = 0, y = 22.74), colour = "#f99d1c", size = 2) +
  geom_point(aes(x = 0, y = 22.74), shape = 5, colour = "#f99d1c", size = 4, stroke = 1.) +
  geom_point(aes(x = 0, y = 2.92), colour = "#307730", size = 2) +
  geom_point(aes(x = 0, y = 2.92), shape = 5, colour = "#307730", size = 4, stroke = 1.) +
  geom_segment(aes(x = 0, y = 0, xend = 0, yend = 50), linetype=2) +
  geom_segment(aes(x = 3, y = 0, xend = 3, yend = 50), linetype=2)
po2011

dev.off()

################################################################################
## FIGURE C5: VOTE INTENTION POLLS - ONTARIO 2014
################################################################################

on2014 <- read_dta("polls/on2014_polls.dta")

colors <- c("olib" = "#ec1c38", "ocon" = "#245fac", "onew" = "#f99d1c", 
            "ogrn" = "#307730")

tiff("on2014.tiff", units="in", width=6, height=5, res=300)

po2014 <- ggplot(on2014, aes(x = time)) + 
  geom_point(aes(y = liberal), color = "#ec1c38", alpha = 1/4) + 
  geom_point(aes(y = pc), color = "#245fac", alpha = 1/4) +
  geom_point(aes(y = ndp), color = "#f99d1c", alpha = 1/4) +
  geom_point(aes(y = green), color = "#307730", alpha = 1/4)
po2014

po2014 <- po2014 + geom_smooth(aes(y = liberal, color = "olib"), se = FALSE, method = loess, span = 0.1, size = 1) + 
  geom_smooth(aes(y = pc, color = "ocon"), se = FALSE, method = loess, span = 0.1, size = 1) + 
  geom_smooth(aes(y = ndp, color = "onew"), se = FALSE, method = loess, span = 0.1, size = 1) + 
  geom_smooth(aes(y = green, color = "ogrn"), se = FALSE, method = loess, span = 0.1, size = 1) +
  labs(color = "") +
  scale_color_manual(labels = c("OPC", "GPO", "OLP", "ONDP"), 
                     values = colors) +
  guides(color=guide_legend(nrow=2, byrow=TRUE)) + theme(
    legend.position = c(.94, 1.02), legend.justification = c("right", "top"),
    legend.box.just = "right", legend.margin = margin(0, 0, 0, 0),
    legend.background = element_rect(fill="NA"),legend.text=element_text(size=7),
    legend.key.size = unit(.4, 'cm'))
po2014

po2014 <- po2014 + ylab("Percentage") + xlab("Number of Days Before the Election") + 
  ggtitle("") + 
  theme(panel.background = element_rect(fill = "white", colour = "white"),
        plot.title = element_text(size = 12, hjust = 0.5), panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), panel.border = element_rect(colour = "black", fill=NA, size=1),
        axis.title = element_text(size = 9.5), axis.text = element_text(size = 9.5),
        axis.ticks.x=element_line(linewidth=.5), axis.ticks.y=element_line(linewidth=.5),
        axis.title.y = element_text(margin = margin(t = 0, r = 8, b = 0, l = 0)),
        axis.title.x = element_text(margin = margin(t = 8, r = 0, b = 0, l = 0))) + 
  scale_x_reverse("Number of Days Before the Election", limits=c(40,0), breaks = seq(0,40,10), expand = expansion(mult = c(0.02, 0.03))) +
  scale_y_continuous("Percentage", limits=c(0,50), breaks = seq(0,50,10), expand = expansion(mult = c(0.03, 0.03))) +
  geom_point(aes(x = 0, y = 38.67), colour = "#ec1c38", size = 2) +
  geom_point(aes(x = 0, y = 38.67), shape = 5, colour = "#ec1c38", size = 4, stroke = 1.) +
  geom_point(aes(x = 0, y = 31.23), colour = "#245fac", size = 2) +
  geom_point(aes(x = 0, y = 31.23), shape = 5, colour = "#245fac", size = 4, stroke = 1.) +
  geom_point(aes(x = 0, y = 23.75), colour = "#f99d1c", size = 2) +
  geom_point(aes(x = 0, y = 23.75), shape = 5, colour = "#f99d1c", size = 4, stroke = 1.) +
  geom_point(aes(x = 0, y = 4.89), colour = "#307730", size = 2) +
  geom_point(aes(x = 0, y = 4.89), shape = 5, colour = "#307730", size = 4, stroke = 1.) +
  geom_segment(aes(x = 0, y = 0, xend = 0, yend = 50), linetype=2)
po2014

dev.off()

################################################################################
## FIGURE C6: VOTE INTENTION POLLS - QUEBEC 2022
################################################################################

qc2022 <- read_dta("polls/qc2022_polls.dta")

colors <- c("qcaq" = "#00a9e7", "qplq" = "#eb1f30", "qpq" = "#0070c0", 
            "qqs" = "#ff5505", "qpcq" = "#172853")

tiff("qc2022.tiff", units="in", width=6, height=5, res=300)

qc2022 <- ggplot(qc2022, aes(x = time)) + 
  geom_point(aes(y = caq), color = "#00a9e7", alpha = 1/4) + 
  geom_point(aes(y = plq), color = "#eb1f30", alpha = 1/4) +
  geom_point(aes(y = pq), color = "#0070c0", alpha = 1/4) +
  geom_point(aes(y = qs), color = "#ff5505", alpha = 1/4) +
  geom_point(aes(y = pcq), color = "#172853", alpha = 1/4)
qc2022

qc2022 <- qc2022 + geom_smooth(aes(y = caq, color = "qcaq"), se = FALSE, method = loess, span = 0.1, size = 1) + 
  geom_smooth(aes(y = plq, color = "qplq"), se = FALSE, method = loess, span = 0.1, size = 1) + 
  geom_smooth(aes(y = pq, color = "qpq"), se = FALSE, method = loess, span = 0.1, size = 1) + 
  geom_smooth(aes(y = qs, color = "qqs"), se = FALSE, method = loess, span = 0.1, size = 1) +
  geom_smooth(aes(y = pcq, color = "qpcq"), se = FALSE, method = loess, span = 0.1, size = 1) +
  labs(color = "") +
  scale_color_manual(labels = c("CAQ", "PCQ", "PLQ", "PQ", "QS"), 
                     values = colors) +
  guides(color=guide_legend(nrow=2, byrow=TRUE)) + theme(
    legend.position = c(.92, 1.02), legend.justification = c("right", "top"),
    legend.box.just = "right", legend.margin = margin(0, 0, 0, 0),
    legend.background = element_rect(fill="NA"),legend.text=element_text(size=7),
    legend.key.size = unit(.4, 'cm'))
qc2022

qc2022 <- qc2022 + ylab("Percentage") + xlab("Number of Days Before the Election") + 
  ggtitle("") + 
  theme(panel.background = element_rect(fill = "white", colour = "white"),
        plot.title = element_text(size = 12, hjust = 0.5), panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), panel.border = element_rect(colour = "black", fill=NA, size=1),
        axis.title = element_text(size = 9.5), axis.text = element_text(size = 9.5),
        axis.ticks.x=element_line(linewidth=.5), axis.ticks.y=element_line(linewidth=.5),
        axis.title.y = element_text(margin = margin(t = 0, r = 8, b = 0, l = 0)),
        axis.title.x = element_text(margin = margin(t = 8, r = 0, b = 0, l = 0))) + 
  scale_x_reverse("Number of Days Before the Election", limits=c(40,0), breaks = seq(0,40,10), expand = expansion(mult = c(0.02, 0.03))) +
  scale_y_continuous("Percentage", limits=c(0,50), breaks = seq(0,50,10), expand = expansion(mult = c(0.03, 0.03))) +
  geom_point(aes(x = 0, y = 40.98), colour = "#00a9e7", size = 2) +
  geom_point(aes(x = 0, y = 40.98), shape = 5, colour = "#00a9e7", size = 4, stroke = 1.) +
  geom_point(aes(x = 0, y = 14.37), colour = "#eb1f30", size = 2) +
  geom_point(aes(x = 0, y = 14.37), shape = 5, colour = "#eb1f30", size = 4, stroke = 1.) +
  geom_point(aes(x = 0, y = 14.61), colour = "#0070c0", size = 2) +
  geom_point(aes(x = 0, y = 14.61), shape = 5, colour = "#0070c0", size = 4, stroke = 1.) +
  geom_point(aes(x = 0, y = 15.43), colour = "#ff5505", size = 2) +
  geom_point(aes(x = 0, y = 15.43), shape = 5, colour = "#ff5505", size = 4, stroke = 1.) +
  geom_point(aes(x = 0, y = 12.91), colour = "#172853", size = 2) +
  geom_point(aes(x = 0, y = 12.91), shape = 5, colour = "#172853", size = 4, stroke = 1.) +
  geom_segment(aes(x = 1, y = 0, xend = 1, yend = 50), linetype=2) +
  geom_segment(aes(x = 27, y = 0, xend = 27, yend = 50), linetype=2)
qc2022

dev.off()

################################################################################
## FIGURE D1: SCATTERPLOT
################################################################################

distribution.vote <- merge %>%
  # Grouping by 'district_code' and 'election'
  group_by(district_code, election) %>%
  # Adding the columns with proportions
  mutate(
    vote_district_prop = mean(vote_district == "Yes", na.rm = TRUE),
    pidstatus_district_prop = mean(pidstatus_district == 3, na.rm = TRUE),
    correct_district_prop = mean(correct_district == 1, na.rm = TRUE),
  ) %>% distinct(district_code, .keep_all = TRUE) %>%
  # Ungrouping after the operation to avoid any further unwanted grouping
  ungroup()

distribution.vote$vote_district_prop <- as.numeric(distribution.vote$vote_district_prop)
distribution.vote$pidstatus_district_prop <- as.numeric(distribution.vote$pidstatus_district_prop)
distribution.vote$correct_district_prop <- as.numeric(distribution.vote$correct_district_prop)

tiff("scatter.tiff", units="in", width=10, height=5, res=1200)

scatter.g <- ggplot(distribution.vote, aes(x=vote_district_prop, y=correct_district_prop)) + 
  geom_point(alpha=0.5) + geom_smooth(method=lm, se=FALSE, color="#c30010", size=.5) +
  scale_x_continuous("Percentage of Winners", 
                     limits=c(0,1), breaks = seq(0,1,.2)) +
  scale_y_continuous("Percentage of Correct Forecasts", 
                     limits=c(0,1), breaks = seq(0,1,.2)) +
  stat_cor(method = "pearson", label.x = 0.55, label.y = 0.005, size = 3.5) + facet_wrap(~election) +
  theme_bw() + theme(axis.title = element_text(size = 12), 
                     axis.text = element_text(size = 12),
                     strip.text = element_text(size=12),
                     axis.title.x = element_text(margin = margin(t = 5, r = 0, b = 0, l = 0)),
                     axis.title.y = element_text(margin = margin(t = 0, r = 5, b = 0, l = 0)))

scatter.g

dev.off()

################################################################################
## FIGURE D2: DISTRIBUTION
################################################################################

tiff("distribution_ridges_vote_district.tiff", units="in", width=8, height=5, res=1200)

distribution.ridges.vote.g <- ggplot(distribution.vote, aes(x = vote_district_prop, y = election, fill = election)) +
  geom_density_ridges(alpha = 0.7) +
  theme_ridges() +
  geom_vline(xintercept=.5, linetype="dashed", color="black", size=.3) +
  coord_cartesian(clip = "off") +
  theme_bw() +
  theme(legend.position = "none",
        axis.title = element_text(size = 12), 
        axis.text = element_text(size = 12),
        axis.title.x = element_text(margin = margin(t = 5, r = 0, b = 0, l = 0))) + 
  scale_x_continuous("Percentage", 
                     limits=c(0,1), breaks = seq(0,1,.2)) +
  scale_y_discrete("", expand = c(-0.02, 0.3)) + 
  scale_fill_manual(values = c("grey","grey","grey","grey","grey","grey"))

distribution.ridges.vote.g

dev.off()

################################################################################
## FIGURE D3: WISDOM OF CROWDS (LOSERS ONLY)
################################################################################

woc.comp.losers <- sjlabelled::read_stata("data/woc_comp_losers.dta")

woc.comp.losers$election = factor(woc.comp.losers$election, levels = c("CA 2011", "CA 2015", "CA 2019",
                                                         "ON 2011", "ON 2014", "QC 2022",
                                                         "All"), ordered = TRUE)

tiff("woc_comp_losers.tiff", units="in", width=10, height=5, res=1200)

woc.comp.losers.g <- woc.comp.losers %>% ggplot(aes(x=election, y=percent, fill=as.factor(level))) + 
  geom_bar(stat="identity", alpha=.25, data = . %>% filter(comp == "with"), width = 0.9, 
           position = "dodge", show.legend = FALSE) +
  geom_bar(stat="identity", data = . %>% filter(comp == "without"), width = 0.6, 
           position = position_dodge(width = 0.9)) +
  scale_y_continuous("% Correct Forecasts", limits=c(0,100), breaks = seq(0,100,20)) +
  scale_x_discrete("") +
  scale_fill_manual(name="Forecast Level", labels=c("Individual","District"), values=c("#0072B2","#009E73"))

woc.comp.losers.g + 
  theme(axis.title.x=element_blank(),
        axis.title.y=element_text(margin=margin(t=0, r=8, b=0, l=0), size = 15),
        axis.text.x=element_text(size = 14),
        axis.text.y=element_text(size = 14),
        plot.title = element_text(size = 14, hjust = 0.5),
        legend.position = "bottom",
        legend.title=element_text(size=14),
        legend.text=element_text(size=12))

dev.off()

################################################################################
## FIGURE D4: WISDOM OF CROWDS (WINNERS ONLY)
################################################################################

woc.comp.winners <- sjlabelled::read_stata("data/woc_comp_winners.dta")

woc.comp.winners$election = factor(woc.comp.winners$election, levels = c("CA 2011", "CA 2015", "CA 2019",
                                                                         "ON 2011", "ON 2014", "QC 2022",
                                                                         "All"), ordered = TRUE)

tiff("woc_comp_winners.tiff", units="in", width=10, height=5, res=1200)

woc.comp.winners.g <- woc.comp.winners %>% ggplot(aes(x=election, y=percent, fill=as.factor(level))) + 
  geom_bar(stat="identity", alpha=.25, data = . %>% filter(comp == "with"), width = 0.9, 
           position = "dodge", show.legend = FALSE) +
  geom_bar(stat="identity", data = . %>% filter(comp == "without"), width = 0.6, 
           position = position_dodge(width = 0.9)) +
  scale_y_continuous("% Correct Forecasts", limits=c(0,100), breaks = seq(0,100,20)) +
  scale_x_discrete("") +
  scale_fill_manual(name="Forecast Level", labels=c("Individual","District"), values=c("#0072B2","#009E73"))

woc.comp.winners.g + 
  theme(axis.title.x=element_blank(),
        axis.title.y=element_text(margin=margin(t=0, r=8, b=0, l=0), size = 15),
        axis.text.x=element_text(size = 14),
        axis.text.y=element_text(size = 14),
        plot.title = element_text(size = 14, hjust = 0.5),
        legend.position = "bottom",
        legend.title=element_text(size=14),
        legend.text=element_text(size=12))

dev.off()